"""
@File  :from_shard_get_raw.py
@Author:Xuelong Geng
@Date  :2024/6/13 13:02
@Desc  :
"""
from gxl_ai_utils.utils import utils_file

input_shard_list = "/home/work_nfs14/xlgeng/asr_data_shard/wenetspeech4tts/shards_list.txt"
output_wav_dir = "/home/work_nfs14/xlgeng/asr_data_raw/wenetspeech4tts/raw_wav"
output_wav_path = "/home/work_nfs14/xlgeng/asr_data_raw/wenetspeech4tts/wav.scp"
output_text_path = "/home/work_nfs14/xlgeng/asr_data_raw/wenetspeech4tts/text"

utils_file.makedir(output_wav_dir)
input_shard_list = utils_file.load_list_file_clean(input_shard_list)
utils_file.do_uncompress_shard(shard_path_list=input_shard_list,output_dir=output_wav_dir,
                               wav_path=output_wav_path, text_path=output_text_path,
                               num_thread=32)


